From a66e6468bd7f04eb4c3984ce4b404259d5329d9b Mon Sep 17 00:00:00 2001 From: tsteven4 <13596209+tsteven4@users.noreply.github.com> Date: Wed, 8 Apr 2020 06:20:52 -0600 Subject: [PATCH] improve kml reader speed 5.1x. (#531) --- gtrnctr.cc | 3 +- kml.cc | 14 ++++++-- xmlgeneric.cc | 94 +++++++++++++++++++++++++++++++++------------------ xmlgeneric.h | 9 ++--- 4 files changed, 78 insertions(+), 42 deletions(-) diff --git a/gtrnctr.cc b/gtrnctr.cc index 831fb7b9a..0d362f93c 100644 --- a/gtrnctr.cc +++ b/gtrnctr.cc @@ -170,8 +170,7 @@ gtc_tags_to_ignore[] = { static void gtc_rd_init(const QString& fname) { - xml_init(fname, gtc_map, nullptr); - xml_ignore_tags(gtc_tags_to_ignore); + xml_init(fname, gtc_map, nullptr, gtc_tags_to_ignore); } static void diff --git a/kml.cc b/kml.cc index 7312df325..dfaaef939 100644 --- a/kml.cc +++ b/kml.cc @@ -315,7 +315,16 @@ const char* kml_tags_to_ignore[] = { "kml", "Document", "Folder", - nullptr, + nullptr +}; + +static +const char* kml_tags_to_skip[] = { + "Camera", + "LookAt", + "styleUrl", + "snippet", + nullptr }; // The TimeSpan/begin and TimeSpan/end DateTimes: @@ -546,8 +555,7 @@ static void kml_rd_init(const QString& fname) { - xml_init(fname, kml_map, nullptr); - xml_ignore_tags(kml_tags_to_ignore); + xml_init(fname, kml_map, nullptr, kml_tags_to_ignore, kml_tags_to_skip); } static diff --git a/xmlgeneric.cc b/xmlgeneric.cc index d832fee49..15c301959 100644 --- a/xmlgeneric.cc +++ b/xmlgeneric.cc @@ -19,24 +19,33 @@ */ +#include // for QByteArray +#include // for QHash +#include // for QIODevice, QIODevice::ReadOnly +#include // for QLatin1Char +#include // for QStringRef +#include // for QTextCodec +#include // for QXmlStreamAttributes +#include // for QXmlStreamReader, QXmlStreamReader::Characters, QXmlStreamReader::EndElement, QXmlStreamReader::IncludeChildElements, QXmlStreamReader::StartDocument, QXmlStreamReader::StartElement +#include // for qPrintable + #include "defs.h" -#include "cet_util.h" -#include "src/core/file.h" #include "xmlgeneric.h" - -#include -#include -#include -#include -#include +#include "src/core/file.h" // for File #define DEBUG_TAG 0 #if DEBUG_TAG #include #endif +enum xg_shortcut { + xg_shortcut_none = 0, + xg_shortcut_skip, + xg_shortcut_ignore +}; + static xg_tag_mapping* xg_tag_tbl; -static QSet xg_ignore_taglist; +static QHash* xg_shortcut_taglist; static QString rd_fname; static QByteArray reader_data; @@ -59,8 +68,10 @@ static QTextCodec* codec = utf8_codec; // Qt has no vanilla ASCII encoding =( xg_callback* xml_tbl_lookup(const QString& tag, xg_cb_type cb_type) { - for (xg_tag_mapping* tm = xg_tag_tbl; tm->tag_cb != nullptr; tm++) { - if (str_match(CSTR(tag), tm->tag_name) && (cb_type == tm->cb_type)) { + const QByteArray key = tag.toUtf8(); + const char* keyptr = key.constData(); + for (xg_tag_mapping* tm = xg_tag_tbl; tm->tag_cb != nullptr; ++tm) { + if ((cb_type == tm->cb_type) && str_match(keyptr, tm->tag_name)) { return tm->tag_cb; } } @@ -68,7 +79,8 @@ xml_tbl_lookup(const QString& tag, xg_cb_type cb_type) } void -xml_init(const QString& fname, xg_tag_mapping* tbl, const char* encoding) +xml_init(const QString& fname, xg_tag_mapping* tbl, const char* encoding, + const char** ignorelist, const char** skiplist) { rd_fname = fname; xg_tag_tbl = tbl; @@ -79,6 +91,17 @@ xml_init(const QString& fname, xg_tag_mapping* tbl, const char* encoding) codec = tcodec; } } + xg_shortcut_taglist = new QHash; + if (ignorelist != nullptr) { + for (; ignorelist && *ignorelist; ++ignorelist) { + xg_shortcut_taglist->insert(QString::fromUtf8(*ignorelist), xg_shortcut_ignore); + } + } + if (skiplist != nullptr) { + for (; skiplist && *skiplist; ++skiplist) { + xg_shortcut_taglist->insert(QString::fromUtf8(*skiplist), xg_shortcut_skip); + } + } } void @@ -89,12 +112,18 @@ xml_deinit() xg_tag_tbl = nullptr; xg_encoding = nullptr; codec = utf8_codec; + delete xg_shortcut_taglist; + xg_shortcut_taglist = nullptr; } -static bool -xml_consider_ignoring(const QStringRef& name) +static xg_shortcut +xml_shortcut(const QStringRef& name) { - return xg_ignore_taglist.contains(name.toString()); + QString key = name.toString(); + if (xg_shortcut_taglist->contains(key)) { + return xg_shortcut_taglist->value(key); + } + return xg_shortcut_none; } static void @@ -107,7 +136,7 @@ xml_run_parser(QXmlStreamReader& reader) switch (reader.tokenType()) { case QXmlStreamReader::StartDocument: if (!reader.documentEncoding().isEmpty()) { - codec = QTextCodec::codecForName(CSTR(reader.documentEncoding().toString())); + codec = QTextCodec::codecForName(reader.documentEncoding().toUtf8()); } if (codec == nullptr) { // According to http://www.opentag.com/xfaq_enc.htm#enc_default , we @@ -118,11 +147,17 @@ xml_run_parser(QXmlStreamReader& reader) break; case QXmlStreamReader::StartElement: - if (xml_consider_ignoring(reader.name())) { + switch (xml_shortcut(reader.name())) { + case xg_shortcut_skip: + reader.skipCurrentElement(); goto readnext; - } + case xg_shortcut_ignore: + goto readnext; + default: + break; + } - current_tag.append("/"); + current_tag.append(QLatin1Char('/')); current_tag.append(reader.qualifiedName()); cb = xml_tbl_lookup(current_tag, cb_start); @@ -144,7 +179,7 @@ xml_run_parser(QXmlStreamReader& reader) break; case QXmlStreamReader::EndElement: - if (xml_consider_ignoring(reader.name())) { + if (xml_shortcut(reader.name()) == xg_shortcut_skip) { goto readnext; } @@ -181,18 +216,11 @@ void xml_read() xml_run_parser(reader); if (reader.hasError()) { - fatal(MYNAME ":Read error: %s (%s, line %ld, col %ld)\n", + fatal(MYNAME ":Read error: %s (%s, line %lld, col %lld)\n", qPrintable(reader.errorString()), qPrintable(file.fileName()), - (long) reader.lineNumber(), - (long) reader.columnNumber()); - } -} - -void xml_ignore_tags(const char** taglist) -{ - for (; taglist && *taglist; taglist++) { - xg_ignore_taglist.insert(QString::fromUtf8(*taglist)); + reader.lineNumber(), + reader.columnNumber()); } } @@ -213,11 +241,11 @@ void xml_readstring(const char* str) xml_run_parser(reader); if (reader.hasError()) { - fatal(MYNAME ":Read error: %s (%s, line %ld, col %ld)\n", + fatal(MYNAME ":Read error: %s (%s, line %lld, col %lld)\n", qPrintable(reader.errorString()), "unknown", - (long) reader.lineNumber(), - (long) reader.columnNumber()); + reader.lineNumber(), + reader.columnNumber()); } } diff --git a/xmlgeneric.h b/xmlgeneric.h index b5f329144..c1e97fc82 100644 --- a/xmlgeneric.h +++ b/xmlgeneric.h @@ -22,7 +22,8 @@ #ifndef XMLGENERIC_H_INCLUDED_ #define XMLGENERIC_H_INCLUDED_ -#include +#include // for QString +#include // for QXmlStreamAttributes // Maybe the XmlGeneric string callback really shouldn't have a type // of its own; this was a crutch during the move from char* to QString. @@ -37,7 +38,6 @@ enum xg_cb_type { cb_end, }; -class QXmlStreamAttributes; using xg_callback = void (xg_string, const QXmlStreamAttributes*); struct xg_tag_mapping { @@ -47,9 +47,10 @@ struct xg_tag_mapping { }; extern const char* xhtml_entities; -void xml_ignore_tags(const char** taglist); -void xml_init(const QString& fname, xg_tag_mapping* tbl,const char* encoding); +void xml_init(const QString& fname, xg_tag_mapping* tbl,const char* encoding, + const char** ignorelist = nullptr, + const char** skiplist = nullptr); void xml_read(); void xml_readstring(const char* str); void xml_readprefixstring(const char* str); -- 2.30.2